* WINDOWS PREAMBLE.
#delimit ;
global rawDir "LOCATION OF RAW CPS DATA";
global dataDir "WORKING DIRECTORY";
global currDir "PROGRAM DIRECTORY";
cd "$currDir" ;
capture log close ;
clear all ;
log using 8_MarriedCouples_readInCps.log, replace text;


***************************************************************************;
* READ IN CPS DATA FROM IPUMS FOR EITC EMPLOYMENT PROJECT ;
* MARRIED VERSION ;
***************************************************************************;

local beginTime "DateTime: $S_DATE $S_TIME";
set more off ;
set linesize 200 ;
set mem 5g ;

cd "$dataDir" ;




***************************************************************************;
* DEFINE STUFF THAT WE DONT EVER WANT TO COMMENT OUT. ;
***************************************************************************;
local technical_vars "year serial relate famunit pernum wtsupp spmwt hwtsupp";
local demographic_vars "age marst race sex hispan educ
	statefip wkswork1 uhrswork wksunem1 whynwly mthwelfr schlcoll
	nfams nmothers momloc momrule poploc poprule sploc foodstmp
	pubhous rentsub heatsub lunchsub offtotval stampval heatval union
	qincwage poverty offcutoff offpov spmpov spmthresh spmtotres spmeitc
	spmfedtaxac spmfedtaxbc" ;
local income_vars "inctot incwage incbus incfarm incss incwelfr incgov
	incaloth incretir incssi incdrt incint incunemp incwkcom incvet incsurv
	incdisab incdivid incrent inceduc incchild incalim incasist incother  ";


***************************************************************************;
* READ IN IPUMS CPS DATA ;
***************************************************************************;

* A LIST OF VARIABLES WE WANT TO KEEP FROM THE DATA ;
local keepList "`technical_vars' `demographic_vars' `income_vars'" ;
local cps "cps_00017";
capture erase `cps'.do ;
capture erase `cps'.dat ;
capture erase `cps'.dat.gz ;
local from "$rawDir\\`cps'.dat.gz" ;
local to "$dataDir\\`cps'.dat.gz"  ;
copy `from' `to' , replace;
shell 7za e `cps'.dat.gz ;
local from "$rawDir\\`cps'.do" ;
local to "$dataDir\\`cps'.do"  ;
copy `from' `to' , replace;
quietly do `cps'.do ;
keep `keepList' ;
gen incwageraw = incwage ;
foreach var of varlist `income_vars' {;
	qui replace `var' = . if (`var' == -19998 | `var' == -9999 |
		`var' == 9999 | `var' == 99997 | `var' == 99998 | `var' == 99999 |
		`var' == 999997 | `var' == 9999997 | `var' == 999999 |
		`var' == 99999997 | `var' >= 9999999 | `var' == 1099999 | `var'>1000000 |
		inctot == .);
	};
foreach var of varlist `income_vars' {;
	qui replace `var' = 0 if `var' == . ;
	};
summ `income_vars'  ;
save WorkingFullCPSTEMP_married, replace ;
capture erase `cps'.do ;
capture erase `cps'.dat ;
capture erase `cps'.dat.gz ;
clear ;
 

***************************************************************************;
* BUILDING A MARRIED COUPLE FILE FROM THE INDIVIDUAL FILE ;
***************************************************************************;

local ind_vars "wtsupp age race hispan educ schlcoll whynwly incwageraw";

local hh_vars	"statefip stampval heatval poverty offcutoff offpov spmpov
	spmthresh spmtotres spmeitc spmfedtaxac spmfedtaxbc";

use WorkingFullCPSTEMP_married;
keep year serial sploc pernum sex `ind_vars' `income_vars';
keep if sex == 1 ;
drop sex ;
foreach var in `ind_vars' `income_vars' {;
	rename `var' `var'H;
	};
rename pernum husband_pernum ;
rename sploc pernum ;
sort year serial pernum ;
save men, replace ;
clear ;
use WorkingFullCPSTEMP_married ;
keep year serial pernum sex marst `ind_vars' `income_vars' `hh_vars';
keep if marst == 1 ;
keep if sex == 2 ;
drop sex marst ;
foreach var in `ind_vars' `income_vars' {;
	rename `var' `var'W;
	};
sort year serial pernum ;
by year serial : gen hhsize = _N ;
merge 1:m year serial pernum using men ;
keep if _merge == 3 ;
drop _merge ;
rm men.dta ;
rename pernum wife_pernum ;
sort year serial wife_pernum ;
save marriedCouples, replace ;
clear ;

use WorkingFullCPSTEMP_married ;
keep year serial momloc schlcoll age wtsupp;
keep if (age<=18 | (age>18 & age<24 & (schlcoll==1|schlcoll==3))) ;
gen qualChild = 1 ;
sort year serial momloc ;
collapse (sum) qualChild wtsupp, by(year serial momloc);
rename momloc wife_pernum ;
rename wtsupp chldwt ;
sort year serial wife_pernum ;
save qualifyingChildren, replace ;
clear ;

use marriedCouples ;
merge 1:1 year serial wife_pernum using qualifyingChildren ;
drop if _merge == 2 ;
drop _merge ;
rm marriedCouples.dta ;
rm qualifyingChildren.dta ;
replace qualChild = 0 if qualChild == .;

local temp "" ;
foreach v in `income_vars' {;
	local temp "`temp' `v'H `v'W" ;
	};
local income_vars "`temp'" ;


* EMPLOYED - NOTE THAT THIS IS INDIVIDUAL SPECIFIC. ;
*summ incwageraw* , d ;
gen byte employedW = incwagerawW>0 ;
gen byte employedH = incwagerawH>0 ;
*tabstat employedH employedW [aw=wtsupp], by(year) s(mean) ;

* CREATE UNIFORM EDUCATION BINS ;
foreach i in H W {;
	gen byte hgc`i' = . ;
	replace hgc`i' = 11 if floor(educ`i'/10) < 7;
	replace hgc`i' = 12 if floor(educ`i'/10) == 7;
	replace hgc`i' = 13 if floor(educ`i'/10) > 7 & floor(educ`i'/10) < 11;
	replace hgc`i' = 16 if floor(educ`i'/10) == 11;
	replace hgc`i' = 17 if floor(educ`i'/10) >= 12 & floor(educ`i'/10) < 90 ;
	*tab year hgc`i' , missing;
	};
* TAX YEAR ;
replace year = year - 1;


***************************************************************************;
* RUN THROUGH TAXSIM ;
***************************************************************************;
* STATE ;
gen byte state = . ;
replace state = statefip   if statefip <= 2 ;
replace state = statefip-1 if statefip >= 4  & statefip <= 6 ;
replace state = statefip-2 if statefip >= 8  & statefip <= 13 ;
replace state = statefip-3 if statefip >= 15 & statefip <= 42 ;
replace state = statefip-4 if statefip >= 44 & statefip <= 51 ;
replace state = statefip-5 if statefip >= 53 & statefip <= 56 ;
*tab state, missing ;

gen byte depx = min(qualChild, 5) ;
gen byte depchild = depx ;
gen byte mstat = 2 ;
gen pwages = max(incwageH+incbusH+incfarmH,0) ;
gen swages = max(incwageW+incbusW+incfarmW,0) ;
gen byte dividends = 0 ;
gen byte agex=0 ;
egen gssi = rowtotal(incss* incssi* incsurv* incdisab*);
egen pensions = rowtotal(incretir*);
egen ui = rowtotal(incunemp*);
gen otherprop = min(incwageH+incbusH+incfarmH,0) +
	min(incwageW+incbusW+incfarmW,0) +
	incalothH + incalimH + incotherH + incdrtH + incintH + incdividH + increntH +
	incalothW + incalimW + incotherW + incdrtW + incintW + incdividW + increntW ;
egen transfers = rowtotal(incwelfr* incgov* incwkcom* incvet* inceduc*
	incchild* incasist*) ;

sort year serial ;
save cpsBeforeTaxsim, replace ;

keep year serial wife_pernum state agex depx depchild mstat
	pwages swages dividends gssi pensions ui otherprop transfers;
capture rm msg.txt;
capture rm taxsim_out.dta;
ankurs_taxsim, full replace;
capture rm msg.txt;
capture rm taxsim_out.dta;
keep year serial wife_pernum fiitax fica siitax v25 v39 v22 v23 v19 ;
rename v25 fedEIC;
rename v39 stEIC ;
rename v22 ctc ;
rename v23 actc ;
rename v19 fedRegTax ;
sort year serial wife_pernum ;
save CYtaxsim, replace ;
clear ;

use cpsBeforeTaxsim ;
keep year serial wife_pernum state agex depx depchild mstat
	pwages swages dividends gssi pensions ui otherprop transfers;
sort year;
merge year using cpi;
keep if _merge == 3 ;
drop _merge ;
rename cpi currentYearCpi ;
replace year = year - 1 ;
sort year;
merge year using cpi;
keep if _merge == 3 ;
drop _merge ;
rename cpi priorYearCpi ;
foreach dollar of varlist pwages swages gssi
		otherprop pensions transfers ui {;
	replace `dollar' = `dollar' * (priorYearCpi/currentYearCpi);
	};
capture rm msg.txt;
capture rm taxsim_out.dta;
ankurs_taxsim, full replace;
capture rm msg.txt;
capture rm taxsim_out.dta;
keep year serial wife_pernum v25 v39 v22 v23 v19 priorYearCpi currentYearCpi ;
rename v25 fedEIC_ly;
rename v39 stEIC_ly;
rename v22 ctc_ly;
rename v23 actc_ly;
rename v19 fedRegTax_ly ;
foreach dollar of varlist fedEIC_ly stEIC_ly ctc_ly actc_ly fedRegTax_ly {;
	replace `dollar' = `dollar' * (currentYearCpi/priorYearCpi);
	};
drop priorYearCpi currentYearCpi ;
replace year = year + 1 ;
sort year serial wife_pernum;
save LYtaxsim, replace ;
clear ;

use cpsBeforeTaxsim ;
merge 1:1 year serial wife_pernum using CYtaxsim ;
drop _merge ;
merge 1:1 year serial wife_pernum using LYtaxsim ;
drop _merge ;
drop depx depchild mstat pwages swages dividends gssi pensions ui
	otherprop transfers;
*rm cpsBeforeTaxsim.dta ;
rm CYtaxsim.dta ;
rm LYtaxsim.dta ;

local taxsim_output "fiitax fica siitax fedEIC stEIC ctc actc fedRegTax
	fedEIC_ly stEIC_ly ctc_ly actc_ly fedRegTax_ly" ;



***************************************************************************;
* INFLATION - EVERYTHING IN 2012 ;
***************************************************************************;
sort year;
merge year using cpi;
tab _merge;
keep if _merge == 3 ;
drop _merge ;
foreach dollar of varlist `income_vars' `taxsim_output'
		stampval heatval  {;
	qui replace `dollar' = `dollar' * (232.957/cpi);
	};

replace heatval = 0 if heatval==.;
replace heatval = heatval*(2+qualChild)/(hhsize) ;
replace stampval = 0 if stampval==. ;
replace stampval = stampval*(2+qualChild)/(hhsize) ;

gen atti = inctotH+inctotW+stampval+heatval-fiitax-siitax-fica ;

summ ;
save cpsAfterTaxsim, replace ;


*/
*;
disp "`beginTime'" ;
disp "DateTime: $S_DATE $S_TIME";
log close ;
